/*here we used anindex column start_station_id = 3374 to speed up*/
SELECT COUNT(*) AS num_trips, 
    HOUR(t.start_time) As initial_time, t.gender, 
COUNT(DISTINCT DATE(t.start_time)) AS num_days, 
COUNT(DISTINCT sm.station_id) AS num_stations,
    sm.name, sm.avg_lon, sm.avg_lat
FROM trips t
INNER JOIN station_months sm ON sm.station_id = t.start_station_id
WHERE start_station_id = 3374
AND gender <> 0 
GROUP BY initial_time, gender;
/*here we used anindex column start_station_id = 3374 to speed up*/
SELECT COUNT(*) AS num_trips, 
    HOUR(t.start_time) As initial_time, t.gender, 
COUNT(DISTINCT DATE(t.start_time)) AS num_days, 
COUNT(DISTINCT sm.station_id) AS num_stations,
    sm.name, sm.avg_lon, sm.avg_lat
FROM trips t
INNER JOIN station_months sm ON sm.station_id = t.start_station_id
WHERE start_station_id = 2006
AND gender <> 0 
GROUP BY initial_time, gender;
/*here we used anindex column start_station_id = 3374 to speed up*/
SELECT COUNT(*) AS num_trips, 
    HOUR(t.start_time) As initial_time, t.gender, 
COUNT(DISTINCT DATE(t.start_time)) AS num_days, 
COUNT(DISTINCT sm.station_id) AS num_stations,
    sm.name, sm.avg_lon, sm.avg_lat
FROM trips t
INNER JOIN station_months sm ON sm.station_id = t.start_station_id
WHERE start_station_id = 3374
AND end_station_id =  2006
AND gender <> 0 
GROUP BY initial_time, gender;
/*here we used anindex column start_station_id = 3374 to speed up*/
SELECT COUNT(*) AS num_trips, 
    HOUR(t.start_time) As initial_time, t.gender, 
COUNT(DISTINCT DATE(t.start_time)) AS num_days, 
COUNT(DISTINCT sm.station_id) AS num_stations,
    sm.name, sm.avg_lon, sm.avg_lat
FROM trips t
INNER JOIN station_months sm ON sm.station_id = t.start_station_id
WHERE start_station_id = 2006
AND end_station_id = 3374
AND gender <> 0 
GROUP BY initial_time, gender;
SELECT station_id, name, avg_lat AS lat, avg_lon AS lon
  FROM station_months 
  WHERE station_id IN (3374, 2006)
  GROUP BY station_id;
Rental Bike to see Central Park

Citibike Station In Central Park 1

North Station Vs South Station

Is biking gendered? In the busy, noisy concrete jungle that is NYC, Central Park can often seem like the only escape to greenery. One of the greatest things about Central Park is its size. This makes it a popular destination for New Yorkers to stroll, run, have picnics and of course ride bikes. Citi Bikes were implemented on May 27, 2013, to navigate through the hectic traffic of both motor vehicles and people. To get a better understanding of this new trend that people are raving about, we explored the CitiBike database in SQL containing the information for each Citibike station in NYC and details of each individual Citibike ride in 2017 in NYC.

There are several Citi Bike stations located near Central Park, but the two main access points are in the North Station and South Station of the Park. Therefore, we selected the Citibike station on Central Park North & Adam Clayton Powell Blvd (North of the Park) and Central Park S & 6 Ave (South of the Park) to analyze the popularity of these two locations by gender. According to the Citibike data, the maximum number of trips taken per year in South Station is 59,604 for males and 19,596 for females. In reverse, the maximum number of trips taken in the North station is 19,224 for males and 11,820 for females. Based on these statistics, it is safe to say that CitiBikes are being utilized more South of the Park in comparison to North of the Park.

# Converting to sf
points_sf <- sm_points %>% 
  st_as_sf(coords = c("lon", "lat"))

# Creating a fancy bicycle icon
icon.fa <- makeAwesomeIcon(icon = "bicycle", markerColor = "blue", library = "fa",
                           iconColor = "white")
# Creating A map of the stations 
map <- leaflet() %>%
 addTiles() %>%
 addPolylines(data = sm_points, lat = ~lat, lng = ~lon) %>%
   addAwesomeMarkers(data = points_sf, popup = ~name,
    icon = icon.fa)
 map
# Calculated distance between points, and found total distance by adding values in matrix
points_sf %>%
  st_distance()

In order to further evaluate gender differences and similarities between the North and South Stations, we assessed the number of trips across Central Park and the direction of those trips (North to South or South to North). We presume that majority of the individuals who rents a Citibike in the north station and drop it off at the south station (and vice versa) are likely to be renting the Citibike with the intention of riding it through the park. This assumption was not supported, from north to south there was only 924 round trips and from south to north was 1,620 round trips. In contrast with the initial number of trips for Central Park North and Central Park South, we found a significant decrease in the number of trips. Regardless of where most people (male or female) start out their biking journey, majority of those trips end at different locations.

# graph Number of Users Based on Hour of Day/ South Station
hours_2006_bar <- ggplot(data = central_park_south, aes(x = initial_time, y = num_trips, fill = gender)) +
  geom_bar(stat = "identity", aes(fill = factor(gender))) +
  theme(panel.background = element_rect(fill = "white"), 
        panel.grid.major.y = element_line(color = "gray", linetype = "dotted"),
        panel.border = element_rect(linetype = "solid", fill = NA)) +
  labs(title= "Central Park S & 6 Ave Station: 
       Number of Users Based on Hour of Day",
                      y="Average Number of Trips in 2017", x = "Hours of a Day") +
  facet_wrap(~gender)
hours_2006_bar

# graph Number of Users Based on Hour of Day/ North Station
hours_3374_bar <- ggplot(data = central_park_north, aes(x = initial_time, y = num_trips, fill = gender)) +
  geom_bar(stat = "identity", aes(fill = factor(gender))) +
  theme(panel.background = element_rect(fill = "white"), 
        panel.grid.major.y = element_line(color = "gray", linetype = "dotted"),
        panel.border = element_rect(linetype = "solid", fill = NA)) +
  labs(title= "Central Park North & Adam Clayton Powell Blvd Station: 
       Number of Users Based on Hour of Day",
                      y="Average Number of Trips in 2017", x = "Hours of a Day") +
  facet_wrap(~gender)
hours_3374_bar

[Figure 1]

We aggregated the number of trips taken by males and females by the hour of the day and averaged it over all the days in 2017. By doing so, we found that the maximum number of trips taken in the north was between 8:00 am and 9:00 am for both genders. Conversely, the maximum number of biking trips taken in the south was between 4:00 pm and 5:00 pm for both genders. This data reveals that males and females use Citibike at similar times of the day.

Interestingly, the peak biking hours for both genders in the North and South corresponds with the traditional business hours, 9:00 am to 5:00 pm. For this reason, we can speculate that maybe Central Park North station is a busier starting point in the mornings hours because it is more residential and people are most likely, biking through the park in order to get to work or as part of a morning fitness regime. While Central Park South is a busier starting point in the afternoon because it marks the end of the traditional workday for most people.

# gender from numerical to categorical/binary
central_park_north_south <- central_park_north_south %>%
  mutate(gender =  as.factor(if_else(gender == 1, "Male", "Female")))

# graph showing the number of individuals who rented bikes in the north station and dropped them off in the south station
hours3374_2006 <- ggplot(data = central_park_north_south, aes(x = initial_time, 
                                                             y = num_trips)) +
  geom_line(aes(color = gender)) + 
  geom_point(aes(color = gender)) +
  theme(panel.background = element_rect(fill = "white"), 
        panel.grid.major.y = element_line(color = "gray", linetype = "dotted"),
        panel.border = element_rect(linetype = "solid", fill = NA)) +
  labs(title= "From the North Station to the South Station: 
       Number of User Based on Hour of the Day",
                      y="Average Number of Trips in 2017", x = "Hours of a Day") +
  scale_x_continuous(breaks = c(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24)) 
hours3374_2006

# gender from numerical to categorical/binary
central_park_south_north <- central_park_south_north %>%
  mutate(gender =  as.factor(if_else(gender == 1, "Male", "Female")))

# graph showing the number of individuals who rented bikes in the south station and dropped them off in the north station
hours_2006_3374 <- ggplot(data = central_park_south_north, aes(x = initial_time, y = num_trips)) +
  geom_line(aes(color = gender)) + 
  geom_point(aes(color = gender)) +
  theme(panel.background = element_rect(fill = "white"), 
        panel.grid.major.y = element_line(color = "gray", linetype = "dotted"),
        panel.border = element_rect(linetype = "solid", fill = NA)) +
  labs(title= "From the South Station to the North Station: 
       Number of User Based on Hour of the Day",
                      y="Average Number of Trips in 2017", x = "Hours of a Day") +
  scale_x_continuous(breaks = c(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24)) 
hours_2006_3374

[Figure 2]

Biker in Central Park

Biker in Central Park 2

From our group repository 3

Word count: 524


  1. Bklyner. “Dot Explores Expanding Bike Share Program Across NYC with Dockless System.” https://s3.amazonaws.com/bklyner/bklyner/wp-content/uploads/2015/07/fgf_citibike_park.jpg.

  2. Tripadvisor. “Rental Bike to See Central Park.” https://media-cdn.tripadvisor.com/media/photo-s/11/ee/e7/93/central-park.jpg

  3. GitHube.“SameeraJhunjhunwala/MP4_bikes.” https://github.com/SameeraJhunjhunwala/MP4_bikes